import { describe, expect, it } from 'bun:test'
import * as fs from 'fs'
import * as path from 'path'

import { astToHTML } from './html'
import { parser } from './parse'

type SpecExample = {
  markdown: string
  html: string
  example: number
  start_line: number
  end_line: number
  section: string
}

const specExamples: SpecExample[] = JSON.parse(
  fs.readFileSync(path.join(__dirname, 'commonmark-spec.json'), 'utf8')
)

/**
 * Entities used in CommonMark spec examples (test-only)
 */
const SPEC_ENTITIES: Record<string, string> = {
  copy: '©',
  AElig: 'Æ',
  aelig: 'æ',
  Dcaron: 'Ď',
  dcaron: 'ď',
  frac34: '¾',
  HilbertSpace: 'ℋ',
  DifferentialD: 'ⅆ',
  ClockwiseContourIntegral: '∲',
  ouml: 'ö',
  Ouml: 'Ö',
  auml: 'ä',
  Auml: 'Ä',
  ngE: '≧̸',
  nge: '≧̸',
}

/**
 * Decode entity references using spec entities
 */
function decodeSpecEntities(text: string): string {
  const core = ['amp', 'apos', 'gt', 'lt', 'quot', 'nbsp']
  return text.replace(/&([a-zA-Z][a-zA-Z0-9]*);/g, (m, n) =>
    core.indexOf(n) !== -1
      ? m
      : SPEC_ENTITIES[n] || SPEC_ENTITIES[n.toLowerCase()] || m
  )
}

/**
 * Post-process HTML: Normalize entities for spec test comparison
 * - Decodes entities in Markdown link URLs, titles, and code fence language attributes
 * - Decodes entities in text content
 * - Preserves entities in raw HTML attributes (pass through as-is per CommonMark spec)
 * - Removes zero-width spaces inserted after escaped & to prevent entity decoding
 */
function postProcessHTML(html: string): string {
  // Remove zero-width spaces inserted after escaped & to prevent entity decoding
  html = html.replace(/\u200B/g, '')

  // Decode entities in Markdown link href attributes
  // Markdown links: <p><a href="/f&ouml;"> (decode and URL-encode entities)
  // Raw HTML: <a href="&ouml;.html"> (preserve entities - no closing tag)
  // Strategy: Only decode href in <a> tags that have closing </a> tags (Markdown links)
  html = html.replace(/<a href="([^"]*)"[^>]*>[\s\S]*?<\/a>/g, fullMatch => {
    // This is a link with closing tag (Markdown link) - decode entities in href and title
    return fullMatch
      .replace(/href="([^"]*)"/, (m, url) => {
        const decoded = decodeSpecEntities(url)
        // URL-encode non-ASCII characters after decoding entities
        // Per CommonMark spec: preserve percent-encoded sequences
        if (decoded !== url) {
          let needsEncoding = false
          for (let i = 0; i < decoded.length; i++) {
            if (decoded.charCodeAt(i) > 127) {
              needsEncoding = true
              break
            }
          }
          if (needsEncoding) {
            // Preserve percent-encoded sequences while encoding non-ASCII characters
            var encoded = ''
            var i = 0
            while (i < decoded.length) {
              // Check for percent-encoded sequence (%XX where XX is hex)
              if (
                decoded[i] === '%' &&
                i + 2 < decoded.length &&
                /[0-9A-Fa-f]/.test(decoded[i + 1]) &&
                /[0-9A-Fa-f]/.test(decoded[i + 2])
              ) {
                // Preserve percent-encoded sequence as-is
                encoded += decoded[i] + decoded[i + 1] + decoded[i + 2]
                i += 3
              } else {
                // Encode this character if needed
                var char = decoded[i]
                var code = char.charCodeAt(0)
                if (code > 127) {
                  // Non-ASCII: encode using encodeURIComponent
                  encoded += encodeURIComponent(char)
                } else {
                  // ASCII: preserve as-is (URL-safe characters)
                  encoded += char
                }
                i++
              }
            }
            return `href="${encoded}"`
          }
        }
        return `href="${decoded}"`
      })
      .replace(/title="([^"]*)"/, (m, title) => {
        return `title="${decodeSpecEntities(title)}"`
      })
  })

  // Decode entities in class attributes (for code fence language)
  html = html.replace(/<code class="([^"]*)"/g, (m, cls) => {
    const decoded = decodeSpecEntities(cls)
    return `<code class="${decoded}"`
  })

  // Temporarily mark other attribute values to preserve them as-is
  const attrPlaceholders: string[] = []
  html = html.replace(/="([^"]*)"/g, (m, value) => {
    if (
      m.startsWith('href=') ||
      m.startsWith('title=') ||
      m.startsWith('class=')
    ) {
      return m // Already processed above
    }
    const placeholder = `__ATTR_${attrPlaceholders.length}__`
    attrPlaceholders.push(value)
    return `="${placeholder}"`
  })

  // Decode entities in text content
  // Note: &amp;entity; should NOT be decoded because the & is already escaped
  // Only decode &entity; patterns, not &amp;entity; patterns
  const core = ['amp', 'apos', 'gt', 'lt', 'quot', 'nbsp']
  html = html.replace(/&([a-zA-Z][a-zA-Z0-9]*);/g, (m, n) =>
    core.indexOf(n) !== -1
      ? m
      : SPEC_ENTITIES[n] || SPEC_ENTITIES[n.toLowerCase()] || m
  )

  // Restore other attribute values (preserving entities as-is)
  html = html.replace(/="__ATTR_(\d+)__"/g, (m, idx) => {
    return `="${attrPlaceholders[parseInt(idx)]}"`
  })

  return html
}

function normalizeHtml(html: string): string {
  return html.replace(/\n\s*/g, '').replace(/>\s+</g, '><').trim()
}

describe('CommonMark 0.31.2 Specification', () => {
  it.each(specExamples)(
    'Example $example: $section (lines $start_line-$end_line)',
    ({ markdown, html: expectedHtml, section }) => {
      // Enable GFM extensions for autolinks and tagfilter tests
      // Note: angle bracket autolinks (<https://...>) are part of core CommonMark spec
      // GFM bare URL autolinks (www., http:// without <>) are extensions
      const isGFMAutolinkTest = section === 'Autolinks (extension)'
      const isTagfilterTest = section === 'tagfilter'

      const ast = parser(markdown, {
        // Bare URL autolinks are GFM extensions - only enable for GFM autolink tests
        // Angle bracket autolinks are handled separately and are always enabled
        disableAutoLink: !isGFMAutolinkTest,
        tagfilter: isTagfilterTest, // Tagfilter is on by default, but most CommonMark tests expect it off
        slugify: () => '',
      })

      const actualHtml = postProcessHTML(
        astToHTML(ast, { tagfilter: isTagfilterTest })
      )

      expect(normalizeHtml(actualHtml)).toBe(normalizeHtml(expectedHtml))
    }
  )
})
